# -*- coding: utf-8 -*-
"""
@Project: base_python
@File: articles.py
@Author: PC
@Date: 2025/05/20
@Description: 
"""
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

class ArticleSpider(CrawlSpider):
    name =  'articles'
    allowed_domains = ['wikipedia.org']
    start_urls = ['https://en.wikipedia.org/wiki/Benevolent_dictator_for_life']
    
    rules = [
        Rule(LinkExtractor(allow=r'.*'), callback='parse_items', follow=True, cb_kwargs={'is_detail': True})
    ]
    
    def parse_items(self,response):
        url = response.url
        title = response.css('h1::text').extract_first()
        text = response.xpath('//div[@id="mw-content-text"]//text()').extract()
        lastUpdated = response.css('li#footer-info-lastmod::text').extract_first()
        lastUpdated = lastUpdated.replace('This page was last edited on ', '')
        print('URL is: {}'.format(url))
        print('Title is: {}'.format(title))
        print('text is: {}'.format(text))
        print('lastUpdated is: {}'.format(lastUpdated))